home *** CD-ROM | disk | FTP | other *** search
- #include <stdio.h>
- #include <string.h>
-
- #include "config.h"
- #include "lint.h"
- #include "rc.h"
-
- /*
- * stralloc.c - string management.
- *
- * All strings are stored in an extensible hash table, with reference counts.
- * free_string decreases the reference count; if it gets to zero, the string
- * will be deallocated. add_string increases the ref count if it finds a
- * matching string, or allocates it if it cant. There is no way to allocate
- * a string of a particular size to fill later (hash wont work!), so you'll
- * have to copy things freom a static (or malloced and later freed) buffer -
- * that is, if you want to avoid space leaks...
- *
- * Current overhead:
- * 8 bytes per string (next pointer, and 2 shorts for length and refs)
- * Strings are nearly all fairly short, so this is a significant overhead-
- * there is also the 4 byte malloc overhead and the fact that malloc
- * generally allocates blocks which are a power of 2 (should write my
- * own best-fit malloc specialised to strings); then again, GNU malloc
- * is bug free...
- */
-
- /*
- * there is also generic hash table management code, but strings can be shared
- * (that was the point of this code), will be unique in the table,
- * require a reference count, and are malloced, copied and freed at
- * will by the string manager. Besides, I wrote this code first :-).
- * Look at htable.c for the other code. It uses the Hash() function
- * defined here, and requires hashed objects to have a pointer to the
- * next element in the chain (which you specify when you call the functions).
- */
-
- #define MAXSHORT (1 << (sizeof(short)*8 - 2))
- #define WORD_ALIGN_BIT 0x3 /* these are 0 for aligned ptrs */
-
- char * xalloc();
- #ifndef _AIX
- char * strcpy();
- #endif
-
- static int num_distinct_strings = 0;
- int bytes_distinct_strings = 0;
- static int allocd_strings = 0;
- static int allocd_bytes = 0;
- int overhead_bytes = 0;
- static int search_len = 0;
- static int num_str_searches = 0;
-
- /*
- * strings are stored:
- * (char * next) (short numrefs) string
- * ^
- * pointer points here
- */
-
- #define NEXT(str) (*(char **)((char *) (str) - sizeof(short) \
- - sizeof(int)))
- #define REFS(str) (*(short *)((char *) (str) - sizeof(short)))
-
- /*
- * hash table - list of pointers to heads of string chains.
- * Each string in chain has a pointer to the next string and a
- * reference count (char *, int) stored just before the start of the string.
- * HTABLE_SIZE is in config.h, and should be a prime, probably between
- * 1000 and 5000.
- */
-
- static char ** base_table = 0;
-
- void init_strings()
- {
- int x;
- base_table = (char **) xalloc(sizeof(char *) * HTABLE_SIZE);
- overhead_bytes += (sizeof(char *) * HTABLE_SIZE);
-
- for (x=0; x<HTABLE_SIZE; x++)
- base_table[x] = 0;
- }
-
- /*
- * generic hash function. This is probably overkill; I haven't checked the
- * stats for different prime numbers, etc.
- */
-
- static int StrHash(s)
- char * s;
- {
- return hashstr(s, 20, HTABLE_SIZE);
- }
-
- /*
- * Looks for a string in the table. If it finds it, returns a pointer to
- * the start of the string part, and moves the entry for the string to
- * the head of the pointer chain. One thing (blech!) - puts the previous
- * pointer on the hash chain into fs_prev.
- */
-
- char * findstring(s)
- char * s;
- {
- char * curr, *prev;
- int h = StrHash(s);
-
- curr = base_table[h];
- prev = 0;
- num_str_searches++;
-
- while (curr) {
- search_len++;
- if (*curr == *s && !strcmp(curr, s)) { /* found it */
- if (prev) { /* not at head of list */
- NEXT(prev) = NEXT(curr);
- NEXT(curr) = base_table[h];
- base_table[h] = curr;
- }
- return(curr); /* pointer to string */
- }
- prev = curr;
- curr = NEXT(curr);
- }
-
- return(0); /* not found */
- }
-
- /*
- * Make a space for a string. This is rather nasty, as we want to use
- * alloc/free, but malloc overhead is generally severe. Later, we
- * will have to compact strings...
- */
-
- static char * alloc_new_string(string)
- char * string;
- {
- char * s = xalloc(1 + strlen(string) + sizeof(char *) + sizeof(short));
- int h = StrHash(string);
-
- s += sizeof(char *) + sizeof(short);
- strcpy(s, string);
- REFS(s) = 0;
- NEXT(s) = base_table[h];
- base_table[h] = s;
- num_distinct_strings++;
- bytes_distinct_strings += 4 + (strlen(s) +3) & ~3;
- overhead_bytes += sizeof(char *) + sizeof(short);
- return(s);
- }
-
- char * make_shared_string(str)
- char * str;
- {
- char * s;
-
- s = findstring(str);
- if (!s)
- s = alloc_new_string(str);
- REFS(s)++;
- allocd_strings++;
- allocd_bytes += 4 + (strlen(str) + 3) & ~3;
- return(s);
- }
-
- /*
- * free_string - reduce the ref count on a string. Various sanity
- * checks applied, the best of them being that a add_string allocated
- * string will point to a word boundary + sizeof(int)+sizeof(short),
- * since malloc always allocates on a word boundary.
- * On systems where a short is 1/2 a word, this gives an easy check
- * to see whather we did in fact allocate it.
- *
- * Don't worry about the overhead for all those checks!
- */
-
- /*
- * function called on free_string detected errors; things return checked(s).
- */
-
- static void checked(s, str) char * s, *str;
- {
- fprintf(stderr, "%s (\"%s\")\n", s, str);
- fatal(s); /* brutal - debugging */
- }
-
- void free_string(str)
- char * str;
- {
- char * s;
-
- allocd_strings--;
- allocd_bytes -= 4 + (strlen(str) + 3) & ~3;
-
- #ifndef BUG_FREE
- #ifdef dcheck /* GNU malloc range check flag */
- { int align;
- align = (((int)str) - sizeof(int) - sizeof(short)) & WORD_B_MASK;
- if (align)
- checked("Free string: improperly aligned string!", str);
- }
- #endif /* dcheck */
- #endif
-
- s = findstring(str); /* moves it to head of table if found */
- #ifndef BUG_FREE
- if (!s) {
- checked("Free string: not found in string table!", str);
- return;
- }
- if (s != str) {
- checked("Free string: string didnt hash to the same spot!", str);
- return;
- }
-
- if (REFS(s) <= 0) {
- checked("Free String: String refs zero or -ve!", str);
- return;
- }
- #endif /* BUG_FREE */
-
- if (REFS(s) > MAXSHORT) return;
- REFS(s)--;
- if (REFS(s) > 0) return;
-
- /* It will be at the head of the hash chain */
- base_table[StrHash(str)] = NEXT(s);
- num_distinct_strings--;
- /* We know how much overhead malloc has */
- bytes_distinct_strings-= 4 + (strlen(s) + 3) & ~3;
- overhead_bytes -= sizeof(short) + sizeof(char *);
- xfree(s-sizeof(short)-sizeof(char *));
-
- return;
- }
-
- /*
- * you think this looks bad! and we didn't even tell them about the
- * GNU malloc overhead! tee hee!
- */
-
- int add_string_status(verbose)
- int verbose;
- {
- if (verbose) {
- add_message("\nShared string hash table:\n");
- add_message("-------------------------\t Strings Bytes\n");
- }
- add_message("Strings malloced\t\t%8d %8d + %d overhead\n",
- num_distinct_strings, bytes_distinct_strings, overhead_bytes);
- if (verbose) {
- add_message("Total asked for\t\t\t%8d %8d\n",
- allocd_strings, allocd_bytes);
- add_message("Space actually required/total string bytes %d%%\n",
- (bytes_distinct_strings + overhead_bytes)*100 / allocd_bytes);
- add_message("Searches: %d Average search length: %6.3f\n",
- num_str_searches, (double)search_len / num_str_searches);
- }
- return(bytes_distinct_strings + overhead_bytes);
- }
-